Differential expression using the bulk RNAseq JAX_RNAseq_Neuroectoderm data from the June 2024 release.

Run DESeq2 on day 3_4 group with day indicator

day 9 group run separately

leave day 14 out

R libraries.

library(ggplot2)
library(ggrepel)
library(ggpubr)
library(stringr)
library(MASS)
library(RColorBrewer)
library(DESeq2)

library(viridis)
library(ggpointdensity)
library(dplyr)
library(data.table)
library(ComplexHeatmap)
library(UpSetR)
library(readxl)
theme_set(theme_classic())

path = "../../data/MorPhiC/June-2024/JAX_RNAseq_Neuroectoderm/"
dat_path = paste0(path, "processed/release110-gencode44/Tables")

Read in meta data

meta = fread("data/June_2024/JAX_RNAseq_Neuroectoderm_meta_data.tsv", 
             data.table = FALSE)
dim(meta)
## [1] 174  32
meta[1:2,]
##   biomaterial_id lib_biomaterial_id differentiated_biomaterial_id
## 1    MOK20002-WT         GT23-05635          CBO-MOK20002-WT-Day7
## 2    MOK20002-WT         GT23-05625          CBO-MOK20002-WT-Day6
##    differentiated_biomaterial_description differentiation_protocol
## 1 KOLF2.2 derived cortical brain organoid                 JAXPD003
## 2 KOLF2.2 derived cortical brain organoid                 JAXPD003
##   differentiated_timepoint_value differentiated_timepoint_unit
## 1                              7                          days
## 2                              6                          days
##   differentiated_terminally_differentiated
## 1                                       No
## 2                                       No
##                                   model_organ ko_strategy ko_gene
## 1 Cortical brain (dorsal forebrain pattering)          WT      WT
## 2 Cortical brain (dorsal forebrain pattering)          WT      WT
##   library_preparation_protocol dissociation_protocol fragment_size input_amount
## 1                     JAXPL001                   N.A           410          300
## 2                     JAXPL001                   N.A           425          300
##   input_unit final_yield final_yield_unit lib_concentration
## 1        ngs       249.6              ngs              51.8
## 2        ngs       170.0              ngs              33.8
##   lib_concentration_unit PCR_cycle PCR_cycle_sample_index
## 1                     nM        10                     NA
## 2                     nM        10                     NA
##                                             file_id sequencing_protocol
## 1 RFX_WT_Day7_GT23-05635_GCACGGAC-TGCGAGAC_S55_L002            JAXPS001
## 2 RFX_WT_Day6_GT23-05625_GACCTGAA-CTCACCAA_S97_L002            JAXPS001
##                       run_id biomaterial_description
## 1 20230424_23-scbct-028-run3                 KOLF2.2
## 2 20230424_23-scbct-028-run3                 KOLF2.2
##   derived_cell_line_accession clone_id protocol_id zygosity type model_system
## 1                    KOLF2.2J       WT         N.A      N.A iPSC          CBO
## 2                    KOLF2.2J       WT         N.A      N.A iPSC          CBO
names(meta)
##  [1] "biomaterial_id"                          
##  [2] "lib_biomaterial_id"                      
##  [3] "differentiated_biomaterial_id"           
##  [4] "differentiated_biomaterial_description"  
##  [5] "differentiation_protocol"                
##  [6] "differentiated_timepoint_value"          
##  [7] "differentiated_timepoint_unit"           
##  [8] "differentiated_terminally_differentiated"
##  [9] "model_organ"                             
## [10] "ko_strategy"                             
## [11] "ko_gene"                                 
## [12] "library_preparation_protocol"            
## [13] "dissociation_protocol"                   
## [14] "fragment_size"                           
## [15] "input_amount"                            
## [16] "input_unit"                              
## [17] "final_yield"                             
## [18] "final_yield_unit"                        
## [19] "lib_concentration"                       
## [20] "lib_concentration_unit"                  
## [21] "PCR_cycle"                               
## [22] "PCR_cycle_sample_index"                  
## [23] "file_id"                                 
## [24] "sequencing_protocol"                     
## [25] "run_id"                                  
## [26] "biomaterial_description"                 
## [27] "derived_cell_line_accession"             
## [28] "clone_id"                                
## [29] "protocol_id"                             
## [30] "zygosity"                                
## [31] "type"                                    
## [32] "model_system"
table(table(meta$biomaterial_id))
## 
##  1  2  3  4 
## 36  5 16 20
table(table(meta$lib_biomaterial_id))
## 
##   1 
## 174
table(table(meta$differentiated_biomaterial_id))
## 
##   1 
## 174

Manually correct a typo in the metadata

For the row with column differentiated_biomaterial_id==“CBO-MOK20004-WT-Day3”, it should have value 3 instead of the original 4 in the column differentiated_timepoint_value.

print("Before correcting the error in timepoint:")
## [1] "Before correcting the error in timepoint:"
print("Table between ko_gene and differentiated_timepoint_value:")
## [1] "Table between ko_gene and differentiated_timepoint_value:"
table(meta$ko_gene, meta$differentiated_timepoint_value)
##        
##         3 4 5 6 7 8 9 14
##   FEZF2 0 0 9 9 9 9 0  0
##   LHX5  9 9 9 9 0 0 0  0
##   LHX9  0 0 0 0 0 0 0  9
##   OTX1  0 0 0 0 9 9 9  0
##   PAX6  0 0 0 0 9 0 0  0
##   RFX4  0 0 0 9 9 9 9  0
##   WT    0 2 2 5 4 5 2  1
meta[which(meta$differentiated_biomaterial_id=="CBO-MOK20004-WT-Day3"), 
     "differentiated_timepoint_value"] = 3

print("After correcting the error in timepoint:")
## [1] "After correcting the error in timepoint:"
print("Table between ko_gene and differentiated_timepoint_value:")
## [1] "Table between ko_gene and differentiated_timepoint_value:"
table(meta$ko_gene, meta$differentiated_timepoint_value)
##        
##         3 4 5 6 7 8 9 14
##   FEZF2 0 0 9 9 9 9 0  0
##   LHX5  9 9 9 9 0 0 0  0
##   LHX9  0 0 0 0 0 0 0  9
##   OTX1  0 0 0 0 9 9 9  0
##   PAX6  0 0 0 0 9 0 0  0
##   RFX4  0 0 0 9 9 9 9  0
##   WT    1 1 2 5 4 5 2  1

Read in gene count data and filter genes

cts = fread(file.path(dat_path, "genesCounts.csv"), data.table = FALSE)
dim(cts)
## [1] 38592   175
cts[1:2, c(1:2, (ncol(cts)-1):ncol(cts))]
##              Name RFX_CE_F5_Day8_GT23-05641_GTCGGAGC-TTATAACC_S73_L002
## 1 ENSG00000268674                                                    0
## 2 ENSG00000271254                                                 1521
##   LHX5_CE_A1_Day3_GT23-07300_AAGTCCAA-TACTCATA_S163_L004
## 1                                                      0
## 2                                                   1270
##   LHX5_KO_E4_Day5_GT23-07318_AACGTTCC-AGTACTCC_S166_L004
## 1                                                      0
## 2                                                    746
stopifnot(setequal(names(cts)[-1], meta$file_id))

meta = meta[match(names(cts)[-1], meta$file_id),]
table(names(cts)[-1] == meta$file_id)
## 
## TRUE 
##  174
cts_dat = data.matrix(cts[,-1])
rownames(cts_dat) = cts$Name

unique_timepoints = unique(meta$differentiated_timepoint_value)
unique_timepoints = sort(unique_timepoints)

xx = as.character(meta$differentiated_timepoint_value)
xx = factor(xx, levels = as.character(unique_timepoints))
meta$timepoint = xx

Discard genes whose expression is 0 in more than 75% of samples

model_s = meta$model_system
table(model_s, useNA="ifany")
## model_s
## CBO 
## 174
get_q75 <- function(x){quantile(x, probs = 0.75)}

gene_info = data.frame(Name = cts$Name, 
                       apply(cts_dat, 1, tapply, model_s, min), 
                       apply(cts_dat, 1, tapply, model_s, median), 
                       apply(cts_dat, 1, tapply, model_s, get_q75), 
                       apply(cts_dat, 1, tapply, model_s, max))

dim(gene_info)
## [1] 38592     5
gene_info[1:2, ]
##                            Name apply.cts_dat..1..tapply..model_s..min.
## ENSG00000268674 ENSG00000268674                                       0
## ENSG00000271254 ENSG00000271254                                     426
##                 apply.cts_dat..1..tapply..model_s..median.
## ENSG00000268674                                          0
## ENSG00000271254                                       1318
##                 apply.cts_dat..1..tapply..model_s..get_q75.
## ENSG00000268674                                        0.00
## ENSG00000271254                                     1701.75
##                 apply.cts_dat..1..tapply..model_s..max.
## ENSG00000268674                                       3
## ENSG00000271254                                    3117
table(row.names(gene_info)==gene_info$Name)
## 
##  TRUE 
## 38592
names(gene_info)[2:5] = paste0(rep(c("CBO_"), times=4), 
                               rep(c("min", "median", "q75", "max"), each=1))
dim(gene_info)
## [1] 38592     5
gene_info[1:2,]
##                            Name CBO_min CBO_median CBO_q75 CBO_max
## ENSG00000268674 ENSG00000268674       0          0    0.00       3
## ENSG00000271254 ENSG00000271254     426       1318 1701.75    3117
summary(gene_info[,-1])
##     CBO_min          CBO_median          CBO_q75            CBO_max       
##  Min.   :    0.0   Min.   :     0.0   Min.   :     0.0   Min.   :      0  
##  1st Qu.:    0.0   1st Qu.:     0.0   1st Qu.:     0.0   1st Qu.:      2  
##  Median :    0.0   Median :     3.0   Median :     5.8   Median :     22  
##  Mean   :  213.8   Mean   :   699.1   Mean   :   931.4   Mean   :   1829  
##  3rd Qu.:   89.0   3rd Qu.:   332.1   3rd Qu.:   474.6   3rd Qu.:   1003  
##  Max.   :88969.0   Max.   :489181.5   Max.   :586205.0   Max.   :1301990
table(gene_info$CBO_q75 > 0)
## 
## FALSE  TRUE 
## 14390 24202
w2kp = which(gene_info$CBO_q75 > 0)
cts_dat = cts_dat[w2kp,]
dim(cts_dat)
## [1] 24202   174
gene_info = gene_info[w2kp,]

meta$read_depth = colSums(cts_dat)/1e6
meta$q75 = apply(cts_dat, 2, quantile, probs = 0.75)

ggplot(meta, aes(x=read_depth, y=q75, color=ko_strategy)) +
    geom_point(size=2, alpha=0.7) + ggtitle("Gene counts") + 
  scale_shape_manual(values = c(7, 16)) + 
  xlab("read depth (million)") + ylab("75 percentile of gene expression") + 
  labs(color = "KO type") + 
  scale_color_brewer(palette = "Set1")

meta$run_id_short = str_replace(meta$run_id, "^[^-]*-", "")

ggplot(meta, aes(x=read_depth, y=q75, color=run_id_short)) +
    geom_point(size=2, alpha=0.7) + ggtitle("Gene counts") + 
  scale_shape_manual(values = c(7, 16)) + 
  xlab("read depth (million)") + ylab("75 percentile of gene expression") + 
  labs(color = "Run ID") + 
  scale_color_brewer(palette = "Set1")

Check the effect of run id among WT

sample2kp = which(meta$ko_gene == "WT")
cts_dat_m = cts_dat[,sample2kp]
meta_m    = meta[sample2kp,]
summary(meta_m$q75)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   424.0   654.0   876.0   880.7  1047.0  1276.0
dim(cts_dat_m)
## [1] 24202    21
stopifnot(all(meta_m$file_id == colnames(cts_dat_m)))

q75 = apply(cts_dat_m, 1, quantile, probs=0.75)
cts_dat_n = t(cts_dat_m[q75 > 0,])
cts_dat_n = log(median(meta_m$q75)*cts_dat_n/meta_m$q75 + 1)
dim(cts_dat_n)
## [1]    21 23745
sample_pca = prcomp(cts_dat_n, center = TRUE, scale. = TRUE)
names(sample_pca)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
pc_scores = sample_pca$x
eigen_vals = sample_pca$sdev^2
eigen_vals[1:5]/sum(eigen_vals)
## [1] 0.27258414 0.16356916 0.10631824 0.07678308 0.05052017
pvs = round(eigen_vals[1:5]/sum(eigen_vals)*100,1)
pvs
## [1] 27.3 16.4 10.6  7.7  5.1
PC_df = data.frame(pc_scores)
PC_df = cbind(PC_df, meta_m)

# there is only one level for model_system here
# no need to use model_system for the shape
gPC = ggplot(PC_df, aes(x = PC1, y = PC2, 
                        color=run_id_short)) +
  geom_point(size=2.5, alpha=0.7) + 
  xlab(sprintf("PC1 (%.1f%% variance)", pvs[1])) + 
  ylab(sprintf("PC2 (%.1f%% variance)", pvs[2])) + 
  ggtitle("Wild type samples") + 
  guides(
    color = guide_legend(title = NULL, order = 1),
    shape = guide_legend(title = NULL, order = 2)
  ) + 
  theme_classic() + 
  theme(
    legend.margin = margin(0, 0, 0, 0), 
    legend.box.just = "left", 
    legend.direction = "vertical" 
  )

print(gPC)

table(PC_df$run_id_short, PC_df$differentiated_timepoint_value)
##                 
##                  3 4 5 6 7 8 9 14
##   robson-003     0 0 0 0 0 0 1  0
##   robson-004     0 0 0 3 0 0 0  0
##   robson-006     0 0 0 0 0 3 0  0
##   robson-020     0 0 0 0 1 1 1  0
##   scbct-008      0 0 0 0 1 0 0  0
##   scbct-028-run3 0 0 0 1 1 1 0  0
##   scbct-039-run2 0 0 1 1 1 0 0  0
##   scbct-052      1 1 1 0 0 0 0  0
##   scbct-092      0 0 0 0 0 0 0  1
n_shapes = length(unique(PC_df$run_id_short))
u_shapes = c(1, 7, 8, 9, 10, 11, 15, 16, 17)[1:n_shapes]

n_time_point = length(unique(PC_df$timepoint))

gPC_time = ggplot(PC_df, aes(x = PC1, y = PC2, 
                        color=timepoint,
                        shape=run_id_short)) +
  geom_point(size=2.5) + 
  scale_shape_manual(values = u_shapes) + 
  scale_color_brewer(palette = "Dark2") +
  xlab(sprintf("PC1 (%.1f%% variance)", pvs[1])) + 
  ylab(sprintf("PC2 (%.1f%% variance)", pvs[2])) + 
  ggtitle("Wild type samples") + 
  guides(
    color = guide_legend(title = NULL, order = 1),
    shape = guide_legend(title = NULL, order = 2)
  ) + 
  theme_classic() + 
  theme(
    legend.margin = margin(0, 0, 0, 0), 
    legend.box.just = "left", 
    legend.direction = "vertical" 
  )

print(gPC_time)

Analyze samples of different model systems

For the June 2024 release of JAX_RNAseq_Neuroectoderm, there is only one model system.

table(meta$run_id, meta$model_system)
##                             
##                              CBO
##   20230228_23-scbct-008       10
##   20230424_23-scbct-028-run3  30
##   20230508_23-scbct-039-run2  30
##   20230522_23-scbct-052       30
##   20230824_23-scbct-092       10
##   20240131_23-robson-020      30
##   20240307_24-robson-003      10
##   20240307_24-robson-004      12
##   20240307_24-robson-006      12
table(meta$run_id, meta$ko_gene)
##                             
##                              FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
##   20230228_23-scbct-008          0    0    0    0    9    0  1
##   20230424_23-scbct-028-run3     0    0    0    0    0   27  3
##   20230508_23-scbct-039-run2    27    0    0    0    0    0  3
##   20230522_23-scbct-052          0   27    0    0    0    0  3
##   20230824_23-scbct-092          0    0    9    0    0    0  1
##   20240131_23-robson-020         0    0    0   27    0    0  3
##   20240307_24-robson-003         0    0    0    0    0    9  1
##   20240307_24-robson-004         0    9    0    0    0    0  3
##   20240307_24-robson-006         9    0    0    0    0    0  3
table(meta$ko_strategy, meta$ko_gene)
##      
##       FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
##   CE     12   12    3    9    3   12  0
##   KO     12   12    3    9    3   12  0
##   PTC    12   12    3    9    3   12  0
##   WT      0    0    0    0    0    0 21
df1 = as.data.frame(table(meta$timepoint, meta$ko_gene))
names(df1)[1:2] = c("time", "gene")
df1$Freq[df1$gene != "WT"] = df1$Freq[df1$gene != "WT"]/3

ggplot(df1, aes(x = time, y = gene, fill = Freq)) + 
  geom_tile(color = "black", linewidth = 0.3) + 
  scale_fill_gradient(low = "white", high = "red") +
  theme_minimal() + 
  geom_text(aes(label = ifelse(gene == "WT", Freq, "")), 
            color = "black", size = 3) + 
  labs(x = "Time", y = "Gene")

for(model1 in unique(meta$model_system)){
  print(model1)
  sample2kp = which(meta$model_system == model1)
  cts_dat_m = cts_dat[,sample2kp]
  meta_m    = meta[sample2kp,]
 
  table(meta_m$ko_strategy)
  stopifnot(all(meta_m$file_id == colnames(cts_dat_m)))
  q75 = apply(cts_dat_m, 1, quantile, probs=0.75)

  cts_dat_n = t(cts_dat_m[q75 > 0,])
  cts_dat_n = log(median(meta_m$q75)*cts_dat_n/meta_m$q75 + 1)
  dim(cts_dat_n)
  
  sample_pca = prcomp(cts_dat_n, center = TRUE, scale. = TRUE)
  names(sample_pca)
  pc_scores = sample_pca$x
  eigen_vals = sample_pca$sdev^2
  eigen_vals[1:5]/sum(eigen_vals)
  pvs = round(eigen_vals[1:5]/sum(eigen_vals)*100,1)
  pvs
  
  PC_df = data.frame(pc_scores)
  PC_df = cbind(PC_df, meta_m)
  
  gPC = ggplot(PC_df, aes(x = PC1, y = PC2, shape=ko_strategy, color=ko_gene)) +
    geom_point(size=2.5, alpha=0.7) + scale_color_brewer(palette = "Dark2") +
      scale_shape_manual(values = c(7, 15, 16, 17)) + 
    xlab(sprintf("PC1 (%.1f%% variance)", pvs[1])) + 
    ylab(sprintf("PC2 (%.1f%% variance)", pvs[2])) + 
    ggtitle(unique(model_s)) + 
    guides(
      color = guide_legend(title = NULL, order = 1),
      shape = guide_legend(title = NULL, order = 2)
    ) + 
    theme_classic() + 
    theme(
      legend.margin = margin(0, 0, 0, 0), 
      legend.box.just = "left", 
      legend.direction = "vertical" 
    )

  print(gPC)
  
  # too many levels for shape, need to manually specify it
  print(length(unique(PC_df$ko_gene)))
  print(length(unique(PC_df$run_id_short)))
  
  gPC = ggplot(PC_df, aes(x = PC1, y = PC2, shape=run_id_short, color=ko_gene)) +
    geom_point(size=2.5, alpha=0.7) + 
    scale_color_brewer(palette = "Dark2") +
    scale_shape_manual(values = c(1, 7, 8, 9, 10, 11, 15, 16, 17)) + 
    xlab(sprintf("PC1 (%.1f%% variance)", pvs[1])) + 
    ylab(sprintf("PC2 (%.1f%% variance)", pvs[2])) + 
    ggtitle(unique(model_s)) + 
    guides(
      color = guide_legend(title = NULL, order = 1),
      shape = guide_legend(title = NULL, order = 2)
    ) +
    theme_classic() + 
    theme(
      legend.margin = margin(0, 0, 0, 0), 
      legend.box.just = "left", 
      legend.direction = "vertical" 
    )

  print(gPC)
  
  print(table(meta_m$run_id_short, meta_m$ko_gene))
  
  colnames(PC_df)[which(colnames(PC_df)=="timepoint")] = "day"
  
  gPC = ggplot(PC_df, aes(x = PC1, y = PC2, shape=ko_gene, color=day)) +
    geom_point(size=2.5, alpha=0.7) + 
    scale_color_brewer(palette = "Dark2") +
    scale_shape_manual(values = c(1, 7, 8, 9, 10, 11, 15, 16, 17)[1:length(unique(PC_df$ko_gene))]) + 
    xlab(sprintf("PC1 (%.1f%% variance)", pvs[1])) + 
    ylab(sprintf("PC2 (%.1f%% variance)", pvs[2])) + 
    ggtitle(unique(model_s)) + 
    guides(
      color = guide_legend(title = NULL, order = 1),
      shape = guide_legend(title = NULL, order = 2)
    ) +
    theme_classic() + 
    theme(
      legend.margin = margin(0, 0, 0, 0), 
      legend.box.just = "left", 
      legend.direction = "vertical" 
    )

  print(gPC)
  
  
  gPC = ggplot(PC_df, aes(x = PC1, y = PC2, shape=ko_gene, color=day, 
                          alpha=ifelse(ko_gene == "WT", 1, 0.6))) +
    geom_point(size=2.5) + 
    scale_color_brewer(palette = "Dark2") +
    scale_shape_manual(values = c(1, 7, 8, 9, 10, 11, 15, 16, 17)[1:length(unique(PC_df$ko_gene))]) + 
    xlab(sprintf("PC1 (%.1f%% variance)", pvs[1])) + 
    ylab(sprintf("PC2 (%.1f%% variance)", pvs[2])) + 
    ggtitle(unique(model_s)) + 
    guides(
      color = guide_legend(title = NULL, order = 1),
      shape = guide_legend(title = NULL, order = 2), 
      alpha = "none"
    ) +
    theme_classic() + 
    theme(
      legend.margin = margin(0, 0, 0, 0), 
      legend.box.just = "left", 
      legend.direction = "vertical" 
    )

  print(gPC)

  ## Generate sample information matrix
  
  cols2kp = c("ko_strategy", "ko_gene", "run_id", "run_id_short", 
              "timepoint", "q75")
  sample_info = meta_m[,cols2kp]
  colnames(sample_info)[which(colnames(sample_info)=="timepoint")] = "day"
  
  dim(sample_info)
  sample_info[1:2,]
  
  table(sample_info$ko_strategy, sample_info$ko_gene)
  
  sample_info$ko_group = paste(sample_info$ko_gene, 
                               sample_info$ko_strategy, sep="_")
  print(table(sample_info$ko_group))
  print(length(unique(sample_info$ko_group)))
  print(table(sample_info$run_id, sample_info$ko_group))
  
  print(table(sample_info$ko_group, sample_info$day))
  
  sample_info$ko_group_run_id = paste(sample_info$ko_group, 
                                      sample_info$run_id_short, sep="_")

  print(table(sample_info$ko_gene, sample_info$day)) 
  print(table(sample_info$ko_group_run_id, sample_info$day)) 
  
  # print the table of ko_group and run_id by day
  for (cur_day in as.character(unique_timepoints)){
    sample_info_by_day = sample_info[which(sample_info$day==cur_day), ]
    print("---------------------------")
    print(paste0("On day ", as.character(cur_day), ": "))
    print(table(sample_info_by_day$ko_group, sample_info_by_day$run_id_short))     
  }
  
  sample_info$log_q75 = log(sample_info$q75)
  
  
  # run DESeq2 for different day groups separately
  # day 3 + day 4
  # day 5
  # day 6
  # day 7
  # day 8 (not excluding RFX4 for now)
  # day 9 (do not include day 14 in this setting)
  # do not include run_id in the model if there is only one run_id in certain day
  
  sample_info$day_group = as.character(sample_info$day)
  sample_info$day_group[which(sample_info$day_group%in%c("3", "4"))] = "3_4"
  table(sample_info$day_group)

  unique_day_groups = setdiff(unique(sample_info$day_group), "14")
  unique_day_groups = sort(unique_day_groups)
  
  for (d_group in unique_day_groups){
    
    print("===================================")
    print(sprintf("day group %s DESeq2 results:", d_group))
    print("===================================")  
    
    dg_index = which(sample_info$day_group==d_group)
    cts_dat_m_dg = cts_dat_m[, dg_index]
    sample_info_dg = sample_info[dg_index, ]
    
    stopifnot(all(sample_info_dg$day_group==d_group))
    
    q75_gene  = apply(cts_dat_m_dg, 1, quantile, prob = 0.75)
    ngt0_gene = rowSums(cts_dat_m_dg > 0)
    print("table(q75_gene > 0, ngt0_gene > 3)")
    print(table(q75_gene > 0, ngt0_gene > 3))

    cts_dat_m_dg = cts_dat_m_dg[which(q75_gene > 0 & ngt0_gene > 3),]
    # add day ID as another covariate for day group 3_4
    # but cannot do that for day group 9_14,
    # since day 14 completely overlaps with run ID scbct-092
    # for day 9 and day 14, only run analysis on day 9 samples
    
    # since day 3_4 only has one run id
    # write out the setting for day 3_4 specifically
    if (d_group == "3_4"){
      dds = DESeqDataSetFromMatrix(cts_dat_m_dg, colData=sample_info_dg, 
                                    ~ ko_group + day + log_q75)      
    }else if (length(unique(sample_info_dg$run_id))==1){
      dds = DESeqDataSetFromMatrix(cts_dat_m_dg, colData=sample_info_dg, 
                                    ~ ko_group + log_q75)
    }else{
      dds = DESeqDataSetFromMatrix(cts_dat_m_dg, colData=sample_info_dg, 
                                    ~ ko_group + run_id + log_q75)     
    }
  
    start.time <- Sys.time()
    dds = DESeq(dds)
    end.time <- Sys.time()
      
    time.taken <- end.time - start.time
    print(time.taken)
    
    ## association with read-depth
    res_rd = results(dds, name = "log_q75")
    res_rd = as.data.frame(res_rd)
    dim(res_rd)
    res_rd[1:2,]
    
    table(is.na(res_rd$padj))
  
    g0 = ggplot(res_rd %>% subset(!is.na(padj)), aes(x=pvalue)) + 
      geom_histogram(color="darkblue", fill="lightblue", 
                     breaks = seq(0,1,by=0.01)) + 
      ggtitle(paste0(model1, " day group ", d_group, " read depth"))
    print(g0)
  
   ## Rerun the analysis without read-depth if it is not significant for 
    ## most genes.
    ## i.e., proportion of non-null in the distribution is smaller than 0.1

    pi_1_rd = max(0, mean(res_rd$pvalue < 0.05) - 0.05)
    pi_1_rd = max(pi_1_rd, 0, 1 - 2*mean(res_rd$pvalue > 0.5))
    
    print(sprintf("prop of non-null for rd: %.2e", pi_1_rd))
    
    if(pi_1_rd < 0.01 || (ncol(cts_dat_m_dg) <= 6 && pi_1_rd < 0.1 )){
      include_rd = 0
      if (length(unique(sample_info_dg$run_id))==1){
        dds = DESeqDataSetFromMatrix(cts_dat_m_dg, colData=sample_info_dg, 
                                      ~ ko_group)
      }else{
        dds = DESeqDataSetFromMatrix(cts_dat_m_dg, colData=sample_info_dg, 
                                      ~ ko_group + run_id)     
      }
      dds = DESeq(dds)
    }else{
      include_rd = 1
    }

    ## association with run_id
      
    if (length(unique(sample_info_dg$run_id))>1){
      
      if(include_rd)
        dds_lrt = DESeq(dds, test="LRT", reduced = ~ ko_group + log_q75)
      else
        dds_lrt = DESeq(dds, test="LRT", reduced = ~ ko_group)

      res_lrt = results(dds_lrt)
    
      res_run_id = as.data.frame(res_lrt)
      dim(res_run_id)
      res_run_id[1:2,]
    
      table(is.na(res_run_id$padj))
      
      g0 = ggplot(res_run_id %>% subset(!is.na(padj)), aes(x=pvalue)) + 
        geom_histogram(color="darkblue", fill="lightblue", 
                       breaks = seq(0,1,by=0.01)) + 
        ggtitle(paste0(model1, " day group ", d_group, " Run ID"))
      print(g0)
    
    }
    
    ## DE analysis for each KO gene
    table(sample_info_dg$ko_gene)
    
    genos = setdiff(unique(sample_info_dg$ko_gene), "WT")
    genos
    
    ko_grp  = c("CE", "KO", "PTC")
    res_geno_df = NULL
    
    for(geno in genos){
      
      res_geno = list()
      
      for(k_grp1 in ko_grp){
        
        res_g1 = results(dds, contrast = c("ko_group", 
                                           paste0(geno, "_", k_grp1), "WT_WT"))
        res_g1 = signif(data.frame(res_g1), 3)
        res_geno[[k_grp1]] = res_g1
        
        g1 = ggplot(res_g1 %>% subset(!is.na(padj)), aes(x=pvalue)) + 
          geom_histogram(color="darkblue", fill="lightblue", 
                         breaks = seq(0,1,by=0.01)) + 
          ggtitle(paste0(model1, " day group ", d_group, " ", geno, "_", k_grp1))
        print(g1)
  
        tag1 = sprintf("%s_%s_%s_%s_", model1, d_group, geno, k_grp1)
        colnames(res_g1) = paste0(tag1, colnames(res_g1))
        
        if(is.null(res_geno_df)){
          res_geno_df = res_g1
        }else if(!is.null(res_geno_df)){
          stopifnot(all(rownames(res_geno_df) == rownames(res_g1)))
          res_geno_df = cbind(res_geno_df, res_g1)
        }
      }
      
      get_index <- function(x){
        which(x$padj < 0.05 & abs(x$log2FoldChange) > log2(1.5))
      }
      
      w_ce = get_index(res_geno[["CE"]])
      w_ko = get_index(res_geno[["KO"]])
      w_ptc = get_index(res_geno[["PTC"]])
  
      print(paste0(model1, " day group ", d_group, " under KO gene ", geno, ":"))
      
      print(paste0("number of DE genes from knock out strategy CE: ", 
                   as.character(length(w_ce))))
      print(paste0("number of DE genes from knock out strategy KO: ", 
                   as.character(length(w_ko))))
      print(paste0("number of DE genes from knock out strategy PTC: ", 
                   as.character(length(w_ptc))))
      
      ce_ko_overlap = length(intersect(rownames(res_geno[["CE"]])[w_ce], 
                                       rownames(res_geno[["KO"]])[w_ko]))
      
      ko_ptc_overlap = length(intersect(rownames(res_geno[["KO"]])[w_ko], 
                                        rownames(res_geno[["PTC"]])[w_ptc]))
      
      ptc_ce_overlap = length(intersect(rownames(res_geno[["PTC"]])[w_ptc], 
                                        rownames(res_geno[["CE"]])[w_ce]))
      
      print(paste0("number of common DE genes by knock out strategies CE and KO: ", 
                   as.character(ce_ko_overlap)))
      print(paste0("number of common DE genes bu knock out strategies KO and PTC: ", 
                   as.character(ko_ptc_overlap)))
      print(paste0("number of common DE genes by knock out strategies PTC and CE: ", 
                   as.character(ptc_ce_overlap)))
            
      if (min(ce_ko_overlap, ko_ptc_overlap, ptc_ce_overlap) > 0){
        m = make_comb_mat(list("CE" = rownames(res_geno[["CE"]])[w_ce], 
                  "KO" = rownames(res_geno[["KO"]])[w_ko], 
                  "PTC" = rownames(res_geno[["PTC"]])[w_ptc]))
        g_up = UpSet(m)
        print(g_up)
      }
      
      df1 = data.frame(padj_CE = res_geno[["CE"]]$padj, 
                       padj_KO = res_geno[["KO"]]$padj, 
                       padj_PTC = res_geno[["PTC"]]$padj)
  
      cr1 = cor(df1$padj_PTC, df1$padj_CE, method = "spearman", use="pair")
      cr2 = cor(df1$padj_PTC, df1$padj_KO, method = "spearman", use="pair")
      
      g4 = ggplot(data = df1, mapping = aes(x = -log10(padj_PTC), 
                                            y = -log10(padj_CE))) +
        geom_pointdensity() + 
        ggtitle(sprintf("%s day group %s, %s, Spearman r = %.2f", 
                        model1, d_group, geno, cr1)) + 
        scale_color_viridis()
      
      g5 = ggplot(data = df1, mapping = aes(x = -log10(padj_PTC), 
                                            y = -log10(padj_KO))) +
        geom_pointdensity() + 
        ggtitle(sprintf("%s day group %s, %s, Spearman r = %.2f", 
                        model1, d_group, geno, cr2)) + 
        scale_color_viridis()
      
      print(g4)
      print(g5)
    }
  
    dim(res_geno_df)
    res_geno_df[1:2,]
    
    res_df = data.frame(gene_ID=rownames(res_geno_df), res_geno_df)
    dim(res_df)
    res_df[1:2,]

    output_dir = "results/2024_06_JAX_RNAseq_Neuroectoderm"
    
    if (!file.exists(output_dir)){
      dir.create(output_dir)
    }
  
    fwrite(res_df, 
           sprintf("%s/2024_06_JAX_RNAseq_Neuroectoderm_%s_%s_DEseq2.tsv", 
                   output_dir, model1, d_group), 
           sep="\t")
    
  }
  
}
## [1] "CBO"

## [1] 7
## [1] 9

##                 
##                  FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
##   robson-003         0    0    0    0    0    9  1
##   robson-004         0    9    0    0    0    0  3
##   robson-006         9    0    0    0    0    0  3
##   robson-020         0    0    0   27    0    0  3
##   scbct-008          0    0    0    0    9    0  1
##   scbct-028-run3     0    0    0    0    0   27  3
##   scbct-039-run2    27    0    0    0    0    0  3
##   scbct-052          0   27    0    0    0    0  3
##   scbct-092          0    0    9    0    0    0  1

## 
##  FEZF2_CE  FEZF2_KO FEZF2_PTC   LHX5_CE   LHX5_KO  LHX5_PTC   LHX9_CE   LHX9_KO 
##        12        12        12        12        12        12         3         3 
##  LHX9_PTC   OTX1_CE   OTX1_KO  OTX1_PTC   PAX6_CE   PAX6_KO  PAX6_PTC   RFX4_CE 
##         3         9         9         9         3         3         3        12 
##   RFX4_KO  RFX4_PTC     WT_WT 
##        12        12        21 
## [1] 19
##                             
##                              FEZF2_CE FEZF2_KO FEZF2_PTC LHX5_CE LHX5_KO
##   20230228_23-scbct-008             0        0         0       0       0
##   20230424_23-scbct-028-run3        0        0         0       0       0
##   20230508_23-scbct-039-run2        9        9         9       0       0
##   20230522_23-scbct-052             0        0         0       9       9
##   20230824_23-scbct-092             0        0         0       0       0
##   20240131_23-robson-020            0        0         0       0       0
##   20240307_24-robson-003            0        0         0       0       0
##   20240307_24-robson-004            0        0         0       3       3
##   20240307_24-robson-006            3        3         3       0       0
##                             
##                              LHX5_PTC LHX9_CE LHX9_KO LHX9_PTC OTX1_CE OTX1_KO
##   20230228_23-scbct-008             0       0       0        0       0       0
##   20230424_23-scbct-028-run3        0       0       0        0       0       0
##   20230508_23-scbct-039-run2        0       0       0        0       0       0
##   20230522_23-scbct-052             9       0       0        0       0       0
##   20230824_23-scbct-092             0       3       3        3       0       0
##   20240131_23-robson-020            0       0       0        0       9       9
##   20240307_24-robson-003            0       0       0        0       0       0
##   20240307_24-robson-004            3       0       0        0       0       0
##   20240307_24-robson-006            0       0       0        0       0       0
##                             
##                              OTX1_PTC PAX6_CE PAX6_KO PAX6_PTC RFX4_CE RFX4_KO
##   20230228_23-scbct-008             0       3       3        3       0       0
##   20230424_23-scbct-028-run3        0       0       0        0       9       9
##   20230508_23-scbct-039-run2        0       0       0        0       0       0
##   20230522_23-scbct-052             0       0       0        0       0       0
##   20230824_23-scbct-092             0       0       0        0       0       0
##   20240131_23-robson-020            9       0       0        0       0       0
##   20240307_24-robson-003            0       0       0        0       3       3
##   20240307_24-robson-004            0       0       0        0       0       0
##   20240307_24-robson-006            0       0       0        0       0       0
##                             
##                              RFX4_PTC WT_WT
##   20230228_23-scbct-008             0     1
##   20230424_23-scbct-028-run3        9     3
##   20230508_23-scbct-039-run2        0     3
##   20230522_23-scbct-052             0     3
##   20230824_23-scbct-092             0     1
##   20240131_23-robson-020            0     3
##   20240307_24-robson-003            3     1
##   20240307_24-robson-004            0     3
##   20240307_24-robson-006            0     3
##            
##             3 4 5 6 7 8 9 14
##   FEZF2_CE  0 0 3 3 3 3 0  0
##   FEZF2_KO  0 0 3 3 3 3 0  0
##   FEZF2_PTC 0 0 3 3 3 3 0  0
##   LHX5_CE   3 3 3 3 0 0 0  0
##   LHX5_KO   3 3 3 3 0 0 0  0
##   LHX5_PTC  3 3 3 3 0 0 0  0
##   LHX9_CE   0 0 0 0 0 0 0  3
##   LHX9_KO   0 0 0 0 0 0 0  3
##   LHX9_PTC  0 0 0 0 0 0 0  3
##   OTX1_CE   0 0 0 0 3 3 3  0
##   OTX1_KO   0 0 0 0 3 3 3  0
##   OTX1_PTC  0 0 0 0 3 3 3  0
##   PAX6_CE   0 0 0 0 3 0 0  0
##   PAX6_KO   0 0 0 0 3 0 0  0
##   PAX6_PTC  0 0 0 0 3 0 0  0
##   RFX4_CE   0 0 0 3 3 3 3  0
##   RFX4_KO   0 0 0 3 3 3 3  0
##   RFX4_PTC  0 0 0 3 3 3 3  0
##   WT_WT     1 1 2 5 4 5 2  1
##        
##         3 4 5 6 7 8 9 14
##   FEZF2 0 0 9 9 9 9 0  0
##   LHX5  9 9 9 9 0 0 0  0
##   LHX9  0 0 0 0 0 0 0  9
##   OTX1  0 0 0 0 9 9 9  0
##   PAX6  0 0 0 0 9 0 0  0
##   RFX4  0 0 0 9 9 9 9  0
##   WT    1 1 2 5 4 5 2  1
##                           
##                            3 4 5 6 7 8 9 14
##   FEZF2_CE_robson-006      0 0 0 0 0 3 0  0
##   FEZF2_CE_scbct-039-run2  0 0 3 3 3 0 0  0
##   FEZF2_KO_robson-006      0 0 0 0 0 3 0  0
##   FEZF2_KO_scbct-039-run2  0 0 3 3 3 0 0  0
##   FEZF2_PTC_robson-006     0 0 0 0 0 3 0  0
##   FEZF2_PTC_scbct-039-run2 0 0 3 3 3 0 0  0
##   LHX5_CE_robson-004       0 0 0 3 0 0 0  0
##   LHX5_CE_scbct-052        3 3 3 0 0 0 0  0
##   LHX5_KO_robson-004       0 0 0 3 0 0 0  0
##   LHX5_KO_scbct-052        3 3 3 0 0 0 0  0
##   LHX5_PTC_robson-004      0 0 0 3 0 0 0  0
##   LHX5_PTC_scbct-052       3 3 3 0 0 0 0  0
##   LHX9_CE_scbct-092        0 0 0 0 0 0 0  3
##   LHX9_KO_scbct-092        0 0 0 0 0 0 0  3
##   LHX9_PTC_scbct-092       0 0 0 0 0 0 0  3
##   OTX1_CE_robson-020       0 0 0 0 3 3 3  0
##   OTX1_KO_robson-020       0 0 0 0 3 3 3  0
##   OTX1_PTC_robson-020      0 0 0 0 3 3 3  0
##   PAX6_CE_scbct-008        0 0 0 0 3 0 0  0
##   PAX6_KO_scbct-008        0 0 0 0 3 0 0  0
##   PAX6_PTC_scbct-008       0 0 0 0 3 0 0  0
##   RFX4_CE_robson-003       0 0 0 0 0 0 3  0
##   RFX4_CE_scbct-028-run3   0 0 0 3 3 3 0  0
##   RFX4_KO_robson-003       0 0 0 0 0 0 3  0
##   RFX4_KO_scbct-028-run3   0 0 0 3 3 3 0  0
##   RFX4_PTC_robson-003      0 0 0 0 0 0 3  0
##   RFX4_PTC_scbct-028-run3  0 0 0 3 3 3 0  0
##   WT_WT_robson-003         0 0 0 0 0 0 1  0
##   WT_WT_robson-004         0 0 0 3 0 0 0  0
##   WT_WT_robson-006         0 0 0 0 0 3 0  0
##   WT_WT_robson-020         0 0 0 0 1 1 1  0
##   WT_WT_scbct-008          0 0 0 0 1 0 0  0
##   WT_WT_scbct-028-run3     0 0 0 1 1 1 0  0
##   WT_WT_scbct-039-run2     0 0 1 1 1 0 0  0
##   WT_WT_scbct-052          1 1 1 0 0 0 0  0
##   WT_WT_scbct-092          0 0 0 0 0 0 0  1
## [1] "---------------------------"
## [1] "On day 3: "
##           
##            scbct-052
##   LHX5_CE          3
##   LHX5_KO          3
##   LHX5_PTC         3
##   WT_WT            1
## [1] "---------------------------"
## [1] "On day 4: "
##           
##            scbct-052
##   LHX5_CE          3
##   LHX5_KO          3
##   LHX5_PTC         3
##   WT_WT            1
## [1] "---------------------------"
## [1] "On day 5: "
##            
##             scbct-039-run2 scbct-052
##   FEZF2_CE               3         0
##   FEZF2_KO               3         0
##   FEZF2_PTC              3         0
##   LHX5_CE                0         3
##   LHX5_KO                0         3
##   LHX5_PTC               0         3
##   WT_WT                  1         1
## [1] "---------------------------"
## [1] "On day 6: "
##            
##             robson-004 scbct-028-run3 scbct-039-run2
##   FEZF2_CE           0              0              3
##   FEZF2_KO           0              0              3
##   FEZF2_PTC          0              0              3
##   LHX5_CE            3              0              0
##   LHX5_KO            3              0              0
##   LHX5_PTC           3              0              0
##   RFX4_CE            0              3              0
##   RFX4_KO            0              3              0
##   RFX4_PTC           0              3              0
##   WT_WT              3              1              1
## [1] "---------------------------"
## [1] "On day 7: "
##            
##             robson-020 scbct-008 scbct-028-run3 scbct-039-run2
##   FEZF2_CE           0         0              0              3
##   FEZF2_KO           0         0              0              3
##   FEZF2_PTC          0         0              0              3
##   OTX1_CE            3         0              0              0
##   OTX1_KO            3         0              0              0
##   OTX1_PTC           3         0              0              0
##   PAX6_CE            0         3              0              0
##   PAX6_KO            0         3              0              0
##   PAX6_PTC           0         3              0              0
##   RFX4_CE            0         0              3              0
##   RFX4_KO            0         0              3              0
##   RFX4_PTC           0         0              3              0
##   WT_WT              1         1              1              1
## [1] "---------------------------"
## [1] "On day 8: "
##            
##             robson-006 robson-020 scbct-028-run3
##   FEZF2_CE           3          0              0
##   FEZF2_KO           3          0              0
##   FEZF2_PTC          3          0              0
##   OTX1_CE            0          3              0
##   OTX1_KO            0          3              0
##   OTX1_PTC           0          3              0
##   RFX4_CE            0          0              3
##   RFX4_KO            0          0              3
##   RFX4_PTC           0          0              3
##   WT_WT              3          1              1
## [1] "---------------------------"
## [1] "On day 9: "
##           
##            robson-003 robson-020
##   OTX1_CE           0          3
##   OTX1_KO           0          3
##   OTX1_PTC          0          3
##   RFX4_CE           3          0
##   RFX4_KO           3          0
##   RFX4_PTC          3          0
##   WT_WT             1          1
## [1] "---------------------------"
## [1] "On day 14: "
##           
##            scbct-092
##   LHX9_CE          3
##   LHX9_KO          3
##   LHX9_PTC         3
##   WT_WT            1
## [1] "==================================="
## [1] "day group 3_4 DESeq2 results:"
## [1] "==================================="
## [1] "table(q75_gene > 0, ngt0_gene > 3)"
##        
##         FALSE  TRUE
##   FALSE   716   244
##   TRUE      0 23242
## Time difference of 12.24709 secs

## [1] "prop of non-null for rd: 0.00e+00"

## [1] "CBO day group 3_4 under KO gene LHX5:"
## [1] "number of DE genes from knock out strategy CE: 1"
## [1] "number of DE genes from knock out strategy KO: 4"
## [1] "number of DE genes from knock out strategy PTC: 5"
## [1] "number of common DE genes by knock out strategies CE and KO: 0"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 3"
## [1] "number of common DE genes by knock out strategies PTC and CE: 0"

## [1] "==================================="
## [1] "day group 5 DESeq2 results:"
## [1] "==================================="
## [1] "table(q75_gene > 0, ngt0_gene > 3)"
##        
##         FALSE  TRUE
##   FALSE   422   213
##   TRUE      0 23567
## Time difference of 20.52245 secs

## [1] "prop of non-null for rd: 0.00e+00"

## [1] "CBO day group 5 under KO gene LHX5:"
## [1] "number of DE genes from knock out strategy CE: 2"
## [1] "number of DE genes from knock out strategy KO: 172"
## [1] "number of DE genes from knock out strategy PTC: 1"
## [1] "number of common DE genes by knock out strategies CE and KO: 1"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 0"
## [1] "number of common DE genes by knock out strategies PTC and CE: 1"

## [1] "CBO day group 5 under KO gene FEZF2:"
## [1] "number of DE genes from knock out strategy CE: 182"
## [1] "number of DE genes from knock out strategy KO: 9"
## [1] "number of DE genes from knock out strategy PTC: 22"
## [1] "number of common DE genes by knock out strategies CE and KO: 9"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 4"
## [1] "number of common DE genes by knock out strategies PTC and CE: 20"

## [1] "==================================="
## [1] "day group 6 DESeq2 results:"
## [1] "==================================="
## [1] "table(q75_gene > 0, ngt0_gene > 3)"
##        
##         FALSE  TRUE
##   FALSE    55   236
##   TRUE      0 23911
## Time difference of 22.15593 secs

## [1] "prop of non-null for rd: 1.21e-01"

## [1] "CBO day group 6 under KO gene LHX5:"
## [1] "number of DE genes from knock out strategy CE: 100"
## [1] "number of DE genes from knock out strategy KO: 22"
## [1] "number of DE genes from knock out strategy PTC: 32"
## [1] "number of common DE genes by knock out strategies CE and KO: 13"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 12"
## [1] "number of common DE genes by knock out strategies PTC and CE: 20"

## [1] "CBO day group 6 under KO gene FEZF2:"
## [1] "number of DE genes from knock out strategy CE: 37"
## [1] "number of DE genes from knock out strategy KO: 32"
## [1] "number of DE genes from knock out strategy PTC: 41"
## [1] "number of common DE genes by knock out strategies CE and KO: 0"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 22"
## [1] "number of common DE genes by knock out strategies PTC and CE: 1"

## [1] "CBO day group 6 under KO gene RFX4:"
## [1] "number of DE genes from knock out strategy CE: 45"
## [1] "number of DE genes from knock out strategy KO: 48"
## [1] "number of DE genes from knock out strategy PTC: 41"
## [1] "number of common DE genes by knock out strategies CE and KO: 33"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 33"
## [1] "number of common DE genes by knock out strategies PTC and CE: 30"

## [1] "==================================="
## [1] "day group 7 DESeq2 results:"
## [1] "==================================="
## [1] "table(q75_gene > 0, ngt0_gene > 3)"
##        
##         FALSE  TRUE
##   FALSE    54   483
##   TRUE      0 23665
## Time difference of 56.84309 secs

## [1] "prop of non-null for rd: 0.00e+00"

## [1] "CBO day group 7 under KO gene FEZF2:"
## [1] "number of DE genes from knock out strategy CE: 1"
## [1] "number of DE genes from knock out strategy KO: 5"
## [1] "number of DE genes from knock out strategy PTC: 2"
## [1] "number of common DE genes by knock out strategies CE and KO: 0"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 1"
## [1] "number of common DE genes by knock out strategies PTC and CE: 0"

## [1] "CBO day group 7 under KO gene PAX6:"
## [1] "number of DE genes from knock out strategy CE: 1"
## [1] "number of DE genes from knock out strategy KO: 1"
## [1] "number of DE genes from knock out strategy PTC: 1"
## [1] "number of common DE genes by knock out strategies CE and KO: 1"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 1"
## [1] "number of common DE genes by knock out strategies PTC and CE: 1"

## [1] "CBO day group 7 under KO gene RFX4:"
## [1] "number of DE genes from knock out strategy CE: 3"
## [1] "number of DE genes from knock out strategy KO: 0"
## [1] "number of DE genes from knock out strategy PTC: 0"
## [1] "number of common DE genes by knock out strategies CE and KO: 0"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 0"
## [1] "number of common DE genes by knock out strategies PTC and CE: 0"

## [1] "CBO day group 7 under KO gene OTX1:"
## [1] "number of DE genes from knock out strategy CE: 4"
## [1] "number of DE genes from knock out strategy KO: 1"
## [1] "number of DE genes from knock out strategy PTC: 5"
## [1] "number of common DE genes by knock out strategies CE and KO: 1"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 1"
## [1] "number of common DE genes by knock out strategies PTC and CE: 3"

## [1] "==================================="
## [1] "day group 8 DESeq2 results:"
## [1] "==================================="
## [1] "table(q75_gene > 0, ngt0_gene > 3)"
##        
##         FALSE  TRUE
##   FALSE   311   623
##   TRUE      0 23268
## Time difference of 28.15195 secs

## [1] "prop of non-null for rd: 1.41e-02"

## [1] "CBO day group 8 under KO gene RFX4:"
## [1] "number of DE genes from knock out strategy CE: 74"
## [1] "number of DE genes from knock out strategy KO: 70"
## [1] "number of DE genes from knock out strategy PTC: 72"
## [1] "number of common DE genes by knock out strategies CE and KO: 55"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 53"
## [1] "number of common DE genes by knock out strategies PTC and CE: 59"

## [1] "CBO day group 8 under KO gene OTX1:"
## [1] "number of DE genes from knock out strategy CE: 37"
## [1] "number of DE genes from knock out strategy KO: 84"
## [1] "number of DE genes from knock out strategy PTC: 47"
## [1] "number of common DE genes by knock out strategies CE and KO: 17"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 26"
## [1] "number of common DE genes by knock out strategies PTC and CE: 16"

## [1] "CBO day group 8 under KO gene FEZF2:"
## [1] "number of DE genes from knock out strategy CE: 37"
## [1] "number of DE genes from knock out strategy KO: 71"
## [1] "number of DE genes from knock out strategy PTC: 64"
## [1] "number of common DE genes by knock out strategies CE and KO: 22"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 28"
## [1] "number of common DE genes by knock out strategies PTC and CE: 23"

## [1] "==================================="
## [1] "day group 9 DESeq2 results:"
## [1] "==================================="
## [1] "table(q75_gene > 0, ngt0_gene > 3)"
##        
##         FALSE  TRUE
##   FALSE   986   293
##   TRUE      0 22923
## Time difference of 10.55259 secs

## [1] "prop of non-null for rd: 6.44e-02"

## [1] "CBO day group 9 under KO gene RFX4:"
## [1] "number of DE genes from knock out strategy CE: 59"
## [1] "number of DE genes from knock out strategy KO: 79"
## [1] "number of DE genes from knock out strategy PTC: 54"
## [1] "number of common DE genes by knock out strategies CE and KO: 39"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 37"
## [1] "number of common DE genes by knock out strategies PTC and CE: 34"

## [1] "CBO day group 9 under KO gene OTX1:"
## [1] "number of DE genes from knock out strategy CE: 52"
## [1] "number of DE genes from knock out strategy KO: 70"
## [1] "number of DE genes from knock out strategy PTC: 61"
## [1] "number of common DE genes by knock out strategies CE and KO: 31"
## [1] "number of common DE genes bu knock out strategies KO and PTC: 35"
## [1] "number of common DE genes by knock out strategies PTC and CE: 35"

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  7735482 413.2   15631714 834.9         NA 15631714 834.9
## Vcells 37947643 289.6   99641649 760.3      65536 99641649 760.3
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] readxl_1.4.2                UpSetR_1.4.0               
##  [3] ComplexHeatmap_2.14.0       data.table_1.14.8          
##  [5] dplyr_1.1.2                 ggpointdensity_0.1.0       
##  [7] viridis_0.6.2               viridisLite_0.4.1          
##  [9] DESeq2_1.38.3               SummarizedExperiment_1.28.0
## [11] Biobase_2.58.0              MatrixGenerics_1.10.0      
## [13] matrixStats_1.0.0           GenomicRanges_1.50.2       
## [15] GenomeInfoDb_1.34.9         IRanges_2.32.0             
## [17] S4Vectors_0.36.2            BiocGenerics_0.44.0        
## [19] RColorBrewer_1.1-3          MASS_7.3-58.2              
## [21] stringr_1.5.0               ggpubr_0.6.0               
## [23] ggrepel_0.9.3               ggplot2_3.4.2              
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_2.1-0       ggsignif_0.6.4         rjson_0.2.21          
##  [4] circlize_0.4.15        XVector_0.38.0         GlobalOptions_0.1.2   
##  [7] clue_0.3-65            rstudioapi_0.14        farver_2.1.1          
## [10] bit64_4.0.5            AnnotationDbi_1.60.2   fansi_1.0.4           
## [13] codetools_0.2-19       doParallel_1.0.17      cachem_1.0.7          
## [16] geneplotter_1.76.0     knitr_1.44             jsonlite_1.8.4        
## [19] broom_1.0.4            annotate_1.76.0        cluster_2.1.4         
## [22] png_0.1-8              compiler_4.2.3         httr_1.4.6            
## [25] backports_1.4.1        Matrix_1.6-4           fastmap_1.1.1         
## [28] cli_3.6.1              htmltools_0.5.5        tools_4.2.3           
## [31] gtable_0.3.3           glue_1.6.2             GenomeInfoDbData_1.2.9
## [34] Rcpp_1.0.10            carData_3.0-5          cellranger_1.1.0      
## [37] jquerylib_0.1.4        vctrs_0.6.2            Biostrings_2.66.0     
## [40] iterators_1.0.14       xfun_0.39              lifecycle_1.0.3       
## [43] rstatix_0.7.2          XML_3.99-0.14          zlibbioc_1.44.0       
## [46] scales_1.2.1           parallel_4.2.3         yaml_2.3.7            
## [49] memoise_2.0.1          gridExtra_2.3          sass_0.4.5            
## [52] stringi_1.7.12         RSQLite_2.3.1          foreach_1.5.2         
## [55] BiocParallel_1.32.6    shape_1.4.6            rlang_1.1.0           
## [58] pkgconfig_2.0.3        bitops_1.0-7           evaluate_0.20         
## [61] lattice_0.20-45        purrr_1.0.1            labeling_0.4.2        
## [64] bit_4.0.5              tidyselect_1.2.0       plyr_1.8.8            
## [67] magrittr_2.0.3         R6_2.5.1               generics_0.1.3        
## [70] DelayedArray_0.24.0    DBI_1.1.3              pillar_1.9.0          
## [73] withr_2.5.0            KEGGREST_1.38.0        abind_1.4-5           
## [76] RCurl_1.98-1.12        tibble_3.2.1           crayon_1.5.2          
## [79] car_3.1-2              utf8_1.2.3             rmarkdown_2.21        
## [82] GetoptLong_1.0.5       locfit_1.5-9.8         blob_1.2.4            
## [85] digest_0.6.31          xtable_1.8-4           tidyr_1.3.0           
## [88] munsell_0.5.0          bslib_0.4.2